{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. 如何对选股因子进行优化？"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 目录\n",
    "1. 选择Alpha003作为优化对象\n",
    "2. 如何设置优化目标？\n",
    "3. 如何进行参数设置？\n",
    "4. 如何输出最优结果？"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 选择Alpha006作为优化对象"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "           000001 000089 000402  000858  000895 600006 600029 600036 600050  \\\n",
      "date                                                                          \n",
      "2016-12-26   9.12    8.1  10.66  33.653  19.355   7.12   7.07  17.75   7.81   \n",
      "2016-12-27   9.08   8.11  10.58  33.643   19.43   7.04   7.03  17.71   7.55   \n",
      "2016-12-28   9.06   7.97  10.43  33.222   19.26   6.93   6.97  17.62   7.69   \n",
      "2016-12-29   9.08    7.9  10.34  33.163  19.534   6.92   6.99  17.49   7.54   \n",
      "2016-12-30    9.1      8   10.3   33.78  19.771   6.88   7.02   17.6   7.31   \n",
      "\n",
      "           601318  \n",
      "date               \n",
      "2016-12-26  35.12  \n",
      "2016-12-27  35.26  \n",
      "2016-12-28  35.29  \n",
      "2016-12-29  35.13  \n",
      "2016-12-30  35.43  \n",
      "date              \n",
      "2016-12-30  600006   -0.229996\n",
      "            600029    0.035984\n",
      "            600036   -0.293132\n",
      "            600050    0.075360\n",
      "            601318   -0.296788\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def correlation(x,y,window=10):\n",
    "    return x.rolling(window).corr(y)\n",
    "\n",
    "def rank(df):\n",
    "    return df.rank(axis=1, pct=True)\n",
    "\n",
    "class alphas(object):\n",
    "    def __init__(self, pn_data):\n",
    "        \"\"\"\n",
    "        :传入参数 pn_data: pandas.Panel\n",
    "        \"\"\"\n",
    "        # 获取历史数据\n",
    "        self.open = pd.DataFrame(pn_data.minor_xs('open'), dtype=np.float64)\n",
    "        self.volume = pd.DataFrame(pn_data.minor_xs('volume'), dtype=np.float64)\n",
    "\n",
    "    # alpha006: (-1 * correlation(open, volume, 10))\n",
    "    def alpha006(self, c=10):\n",
    "        alpha = -1 * correlation(self.open, self.volume, c)\n",
    "        return alpha\n",
    "\n",
    "#传入数据\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    import pandas as pd\n",
    "    import tushare as ts\n",
    "\n",
    "    codes = ['000001', '601318', '600029', '000089', '000402', \n",
    "             '000895', '600006', '000858', '600036', '600050']\n",
    "    stocks_dict = {}\n",
    "    for c in codes:\n",
    "        stock = ts.get_k_data(c, start='2016-01-01', end='2016-12-31', ktype='D', autype='qfq')\n",
    "        stock.index = pd.to_datetime(stock['date'], format='%Y-%m-%d')\n",
    "        stock.pop('date')\n",
    "        stocks_dict[c] = stock\n",
    "    \n",
    "    pn = pd.Panel(stocks_dict)\n",
    "    prices = pn.minor_xs('close')\n",
    "    alpha = alphas(pn).alpha006()\n",
    "    \n",
    "    \n",
    "    #转为Alphalens的数据格式\n",
    "    alpha = alpha.stack()\n",
    "    print prices.tail()\n",
    "    print alpha.tail()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 如何设置优化目标？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import alphalens\n",
    "\n",
    "def opt(c, q):\n",
    "    alpha = alphas(pn).alpha006(c)\n",
    "    factor_data = alpha.stack()\n",
    "    factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_data, prices, quantiles=q)\n",
    "    mean_return_by_q, std_by_q = alphalens.performance.mean_return_by_quantile(factor_data)\n",
    "    sharpe = mean_return_by_q/std_by_q\n",
    "    dct = sharpe.iloc[0].to_dict()\n",
    "    dct.update({'cor_period': c, 'quantile': q})\n",
    "    return dct"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 如何进行参数设置？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "           1        10         5  cor_period  quantile\n",
      "0   0.913825  1.539255  0.331380           2         3\n",
      "1   0.913825  1.539255  0.331380           2         3\n",
      "2   0.913825  1.539255  0.331380           2         3\n",
      "3   0.760644  1.594924  0.356017           2         4\n",
      "4   0.760644  1.594924  0.356017           2         4\n",
      "5   0.760644  1.594924  0.356017           2         4\n",
      "6   1.171541  2.171023  0.826320           2         5\n",
      "7   1.171541  2.171023  0.826320           2         5\n",
      "8   1.171541  2.171023  0.826320           2         5\n",
      "9  -0.251774  0.801417  0.331025           3         3\n",
      "10 -0.251774  0.801417  0.331025           3         3\n",
      "11 -0.251774  0.801417  0.331025           3         3\n",
      "12 -0.306966  1.299807  0.702793           3         4\n",
      "13 -0.306966  1.299807  0.702793           3         4\n",
      "14 -0.306966  1.299807  0.702793           3         4\n",
      "15 -0.219944  1.088090 -0.077157           3         5\n",
      "16 -0.219944  1.088090 -0.077157           3         5\n",
      "17 -0.219944  1.088090 -0.077157           3         5\n",
      "18 -0.004063  0.629185 -0.343687           4         3\n",
      "19 -0.004063  0.629185 -0.343687           4         3\n",
      "20 -0.004063  0.629185 -0.343687           4         3\n",
      "21  0.500395  0.411412 -0.540968           4         4\n",
      "22  0.500395  0.411412 -0.540968           4         4\n",
      "23  0.500395  0.411412 -0.540968           4         4\n",
      "24  1.072324  1.035241  0.151935           4         5\n",
      "25  1.072324  1.035241  0.151935           4         5\n",
      "26  1.072324  1.035241  0.151935           4         5\n",
      "27  0.230672  1.039897  0.476541           5         3\n",
      "28  0.230672  1.039897  0.476541           5         3\n",
      "29  0.230672  1.039897  0.476541           5         3\n",
      "30  0.676630  1.272997 -0.042564           5         4\n",
      "31  0.676630  1.272997 -0.042564           5         4\n",
      "32  0.676630  1.272997 -0.042564           5         4\n",
      "33 -0.022219  1.583215 -0.123898           5         5\n",
      "34 -0.022219  1.583215 -0.123898           5         5\n",
      "35 -0.022219  1.583215 -0.123898           5         5\n"
     ]
    }
   ],
   "source": [
    "import itertools\n",
    "\n",
    "dict_list = []\n",
    "for param in itertools.product(range(2, 6), range(3, 6)):\n",
    "    dct = opt(*param)\n",
    "    for i in [1, 5, 10]:\n",
    "        dct[str(i)] = dct.pop(i)\n",
    "        dict_list.append(dct)\n",
    "# print dict_list\n",
    "\n",
    "result = pd.DataFrame(dict_list)\n",
    "\n",
    "print result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 如何输出最优结果？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "           1        10         5  cor_period  quantile\n",
      "6   1.171541  2.171023  0.826320           2         5\n",
      "7   1.171541  2.171023  0.826320           2         5\n",
      "8   1.171541  2.171023  0.826320           2         5\n",
      "24  1.072324  1.035241  0.151935           4         5\n",
      "25  1.072324  1.035241  0.151935           4         5\n",
      "           1        10         5  cor_period  quantile\n",
      "6   1.171541  2.171023  0.826320           2         5\n",
      "7   1.171541  2.171023  0.826320           2         5\n",
      "8   1.171541  2.171023  0.826320           2         5\n",
      "12 -0.306966  1.299807  0.702793           3         4\n",
      "13 -0.306966  1.299807  0.702793           3         4\n",
      "          1        10         5  cor_period  quantile\n",
      "6  1.171541  2.171023  0.826320           2         5\n",
      "7  1.171541  2.171023  0.826320           2         5\n",
      "8  1.171541  2.171023  0.826320           2         5\n",
      "3  0.760644  1.594924  0.356017           2         4\n",
      "4  0.760644  1.594924  0.356017           2         4\n"
     ]
    }
   ],
   "source": [
    "print result.nlargest(5, columns='1')\n",
    "print result.nlargest(5, columns='5')\n",
    "print result.nlargest(5, columns='10')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
